<?php
/**
 * Created by PhpStorm.2017.1
 * User: Daniel<danieltang521@gmail.com>
 * Date: 5/10/23
 * Time: 10:22 AM
 * FILE_NAME: collectionImage.php
 * Desc:采集图片并保存到本地并将图片生成pdf电子书【为了采集女儿语文和数学课本的电子版】
 * Copyright: Daniel Shanghai China. All rights reserved.
 */

//获取用户浏览器信息
function getAgent(){
    $agent = array(
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)',
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
    );
    return $agent[rand(0,10)];
}
//获取页面
function curl_get($url,$exec=0)
{
    $str = uniqid();
    $ch = curl_init();//初始化
    curl_setopt($ch, CURLOPT_NOBODY, 0);
    curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
    curl_setopt($ch,CURLOPT_URL,$url);//设置参数
    curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-Type: application/json; charset=utf-8;"));
    //curl_setopt($ch,CURLOPT_COOKIE,'tjc='.$str.'; __mta=188184215.1539748785506.1539749013751.1539762941679.1; uuid=8dd75a08c202402a9507.'.time().'.1.0.0; _lxsdk_cuid=166809c160d8d-03f341b2bd39dd-1f396652-1fa400-166809c160ec8; ci=30; rvct=30; __mta=217838505.1539748247374.1539748973285.1539756017055.3; _lxsdk_s=166809c1610-35a-6bc-9a0%7C%7C9');
    //curl_setopt($ch,CURLOPT_COOKIE,'uuid=33d30956f7e34b47b7ed.'.time().'.1.0.0; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_cuid=16f7ea149fe2b-00554a71f6de2f-6701b35-1fa400-16f7ea14a024c; ci=65; rvct=65%2C490%2C151%2C359%2C357%2C42%2C1%2C55%2C10%2C80%2C1164; _lxsdk_s=16f82f0f51a-032-cbc-d2%7C%7C47');
    //curl_setopt($ch,CURLOPT_COOKIE,'uuid=33d30956f7e34b47b7ed.'.time().'.1.0.0; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_cuid=16f7ea149fe2b-00554a71f6de2f-6701b35-1fa400-16f7ea14a024c; ci='.rand(1,99).'; rvct=357%2C65%2C490%2C151%2C359%2C42%2C1%2C55%2C10%2C80%2C1164; _lxsdk_s=16f82f0f51a-032-cbc-d2%7C%7C63');

    curl_setopt ($ch, CURLOPT_REFERER,$url);//设置cURL允许执行的最长秒数
    curl_setopt ($ch, CURLOPT_TIMEOUT,0);//设置cURL允许执行的最长秒数
    curl_setopt($ch,CURLOPT_USERAGENT,getAgent());
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);//执行之后不直接打印出来default=1
    $execres = curl_exec($ch);
    if($exec){
        return $execres;
    }
    if(curl_errno($ch))
    {
        $errormsg = curl_error($ch);
        return json_encode(array('status'=>'fail','msg'=>$errormsg));
    }
    else
    {
        $info = curl_getinfo($ch);
        return json_encode(array('status'=>'ok','content'=>$execres,'msg'=>$info));
    }
    curl_close($ch);
}


//$pdfname = '北师大数学一年级下册';
//$html = curl_get("https://mp.weixin.qq.com/s/5Z5lLHrNQ5hm44IHj77niQ",1);


//$pdfname = '北师大数学二年级上册';
//$html = curl_get("https://mp.weixin.qq.com/s/72fUwwmQMuYKoYfXOtB4GA",1);


//$pdfname = '北师大数学二年级下册';
//$html = curl_get("https://mp.weixin.qq.com/s/l2_YEz0oYCK_jPGZwZS5fA",1);

//$pdfname = '北师大数学三年级上册';
//$html = curl_get("https://mp.weixin.qq.com/s/peUncRNE7PL3VFqOUvnKHg",1);
//
$pdfname = '北师大数学三年级下册';
$html = curl_get("https://mp.weixin.qq.com/s/-K8mCjiITQhPYdH127uHXA",1);


//var_dump($html);

$partten = '/<section data-role="paragraph">([\s\S]*?)<\/section>/i';

preg_match_all($partten,$html,$matches);

//var_dump($matches[1]);

$partten = '/data-src="([\s\S]*?)"/i';
preg_match_all($partten,$matches[1][0],$images);

//var_dump($images[1][0]);
$path = "./files/images/";
$localImages = [];
//图片保存到本地
for($i=0;$i<count($images[1]);$i++){
    $info = pathinfo($images[1][$i]);
    $filename = $path.str_replace('https://mmbiz.qpic.cn/mmbiz_jpg/','',$info['dirname']).".jpeg";
    $imgContent = file_get_contents($images[1][$i]);
    file_put_contents($filename, $imgContent);
    array_push($localImages,$filename);
}

/**
 *
 * 生成pdf
 *
 */

$image_src_pdf = '/Users/mac-developer/Documents/collection-script/'.$pdfname.'.pdf';
try{
    $pdf = new Imagick($localImages);
    $pdf->setImageFormat('pdf');
    $pdf->writeImages($image_src_pdf, true);
    //删除临时文件
    //@unlink($image_src_1);
    //@unlink($image_src_2);
    var_dump("保存成功");
}catch (ImagickException $e){
    return false;
}